In order to understand naturally occurring languages, we consider
the models for finite languages $\rX$ consisting of strings of fixed
finite length $N$ together with a probability function $P$ which
models the natural language.  In what follows, for two strings $x$
and $y$ we denote their concatenation by $xy$.

\begin{exercise}
\label{ex:information_theory:maximum_entropy}
Show that $N$ is the maximum entropy for a probability function on bit
strings of length $N$.
\end{exercise}

\begin{exercise}
\label{ex:information_theory:uniform_probability_space}
Show that the rate of a uniform probability space is $1$ and that this is
the maximal value for any probability space.
\end{exercise}

\begin{exercise}
\label{ex:information_theory:probability_function_on_ciphertext_space}
For a given cryptosystem, show that the definition
$$
P(y) = \sum_{K\in\cK} P(K) \!\!\!\!
       \sum_{\stackrel{\scr x\in\cM}{E_K(x)=y}} \!\!\!\! P(x).
$$
determines a probability function on the ciphertext space.
Then verify the equalities:
$$
P(y) = \sum_{x\in\cM} P(x,y), \quad\hbox{and}\quad P(x) = \sum_{y\in\cC} P(x,y).
$$
\end{exercise}

\begin{exercise}
\label{ex:information_theory:entropy_1_character_strings_language}
Consider the language of $1$-character strings over $\{\tA,\tB,\tC,\tD\}$
with associated probabilities $1/3$, $1/12$, $1/4$, and $1/3$.  What is
its corresponding entropy?
\end{exercise}

\begin{exercise}
\label{ex:information_theory:entropy_language_2_character_srings}
Consider the language $\rX^2$ of all strings of length $2$ in
$\{\tA,\tB,\tC,\tD\}$ defined by the probability function of
Exercise 1 and $2$-character independence: $P(xy) = P(x)P(y)$.
What is the entropy of this language?
\end{exercise}

\begin{exercise}
\label{ex:information_theory:strings_length_2}
Let $\cM$ be the strings of length $2$ over $\{\tA,\tB,\tC,\tD\}$
with the following frequency distribution:
$$
\begin{array}{l}
P({\tt AA}) = 5/36 \\
P({\tt AB}) = 1/36 \\
P({\tt AC}) = 7/72 \\
P({\tt AD}) = 5/72 \\
\end{array}
\quad
\begin{array}{l}
P({\tt BA}) =  0    \\
P({\tt BB}) =  1/144 \\
P({\tt BC}) =  1/48 \\
P({\tt BD}) =  1/18 \\
\end{array}
\quad
\begin{array}{l}
P({\tt CA}) =  1/12 \\
P({\tt CB}) =  1/48 \\
P({\tt CC}) =  1/16 \\
P({\tt CD}) =  1/12 \\
\end{array}
\quad
\begin{array}{l}
P({\tt DA}) = 1/9  \\
P({\tt DB}) = 1/36 \\
P({\tt DC}) = 5/72 \\
P({\tt DD}) = 1/8  \\
\end{array}
$$
Show that the $1$-character frequencies in this language are the same
as for the language in Exercise 2.
\end{exercise}

\begin{exercise}
\label{ex:information_theory:entropy_each_language}
Do you expect the entropy of the language of Exercise 3 to be greater or
less than that of Exercise 2?  What is the entropy of each language?
\end{exercise}

\begin{exercise}
\label{ex:information_theory:infinite_language}
Consider the infinite language of all strings over the alphabet $\{\tA\}$,
with probability function defined such that $P({\tt A\dots A}) = 1/2^n$,
where $n$ is the length of the string ${\tt A\dots A}$.  Show that the
entropy of this language is $2$.
\end{exercise}
